Set-up
library(phyloseq)
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(RColorBrewer)
library(speedyseq)
##
## Attaching package: 'speedyseq'
##
## The following objects are masked from 'package:phyloseq':
##
## filter_taxa, plot_bar, plot_heatmap, plot_tree, psmelt, tax_glom,
## tip_glom, transform_sample_counts
## here() starts at /Users/katebowie/Library/CloudStorage/OneDrive-YaleUniversity/MrOS/OHSU
library(microshades)
library(qwraps2)
##
## Attaching package: 'qwraps2'
##
## The following object is masked from 'package:ggplot2':
##
## mean_se
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(tableone)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-8
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:patchwork':
##
## align_plots
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(ggbeeswarm)
library(ggpubr)
##
## Attaching package: 'ggpubr'
##
## The following object is masked from 'package:cowplot':
##
## get_legend
##
## The following objects are masked from 'package:qwraps2':
##
## mean_ci, mean_sd, median_iqr
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
Age groups - beta diversity
adonis2(dist_wu_ps~ age_group, data = meta_ps)
| Model |
2 |
0.3756496 |
0.0073437 |
1.745935 |
0.039 |
| Residual |
472 |
50.7769842 |
0.9926563 |
NA |
NA |
| Total |
474 |
51.1526339 |
1.0000000 |
NA |
NA |
adonis2(dist_u_ps~ age_group, data = meta_ps)
| Model |
2 |
0.6016895 |
0.0063459 |
1.507192 |
0.029 |
| Residual |
472 |
94.2140633 |
0.9936541 |
NA |
NA |
| Total |
474 |
94.8157528 |
1.0000000 |
NA |
NA |
adonis2(dist_b_ps~ age_group, data = meta_ps)
| Model |
2 |
1.163632 |
0.0068451 |
1.626567 |
0.022 |
| Residual |
472 |
168.832333 |
0.9931549 |
NA |
NA |
| Total |
474 |
169.995965 |
1.0000000 |
NA |
NA |
Age - with top Phyla
# Firmicutes
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Firmicutes")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
shapiro.test(ps_melt$Abundance)
##
## Shapiro-Wilk normality test
##
## data: ps_melt$Abundance
## W = 0.96116, p-value = 0.0000000007126
ggplot(ps_melt, aes(x = age, y = Abundance)) + geom_point() + stat_smooth(method = "lm", col = "red")
## `geom_smooth()` using formula = 'y ~ x'

res <- cor.test(ps_melt$age, ps_melt$Abundance,
method = "spearman") #pearson is for normal, spearman is for non-normal
## Warning in cor.test.default(ps_melt$age, ps_melt$Abundance, method =
## "spearman"): Cannot compute exact p-value with ties
## [1] 0.7419283
# Proteobacteria
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Proteobacteria")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
shapiro.test(ps_melt$Abundance)
##
## Shapiro-Wilk normality test
##
## data: ps_melt$Abundance
## W = 0.84118, p-value < 0.00000000000000022
ggplot(ps_melt, aes(x = age, y = Abundance)) + geom_point() + stat_smooth(method = "lm", col = "red")
## `geom_smooth()` using formula = 'y ~ x'

res <- cor.test(ps_melt$age, ps_melt$Abundance,
method = "spearman") #pearson is for normal, spearman is for non-normal
## Warning in cor.test.default(ps_melt$age, ps_melt$Abundance, method =
## "spearman"): Cannot compute exact p-value with ties
## [1] 0.3939888
# Actinobacteria
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Actinobacteria")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
shapiro.test(ps_melt$Abundance)
##
## Shapiro-Wilk normality test
##
## data: ps_melt$Abundance
## W = 0.80902, p-value < 0.00000000000000022
ggplot(ps_melt, aes(x = age, y = Abundance)) + geom_point() + stat_smooth(method = "lm", col = "red")
## `geom_smooth()` using formula = 'y ~ x'

res <- cor.test(ps_melt$age, ps_melt$Abundance,
method = "spearman") #pearson is for normal, spearman is for non-normal
## Warning in cor.test.default(ps_melt$age, ps_melt$Abundance, method =
## "spearman"): Cannot compute exact p-value with ties
## [1] 0.07209791
# Bacteroidetes
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Bacteroidetes")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
shapiro.test(ps_melt$Abundance)
##
## Shapiro-Wilk normality test
##
## data: ps_melt$Abundance
## W = 0.77261, p-value < 0.00000000000000022
ggplot(ps_melt, aes(x = age, y = Abundance)) + geom_point() + stat_smooth(method = "lm", col = "red")
## `geom_smooth()` using formula = 'y ~ x'

res <- cor.test(ps_melt$age, ps_melt$Abundance,
method = "spearman") #pearson is for normal, spearman is for non-normal
## Warning in cor.test.default(ps_melt$age, ps_melt$Abundance, method =
## "spearman"): Cannot compute exact p-value with ties
## [1] 0.1918584
Enrollment site
alpha diversity
plot_richness(ps, x = "SITE", color = "SITE", scales = "free_y", measures=c("Observed", "Shannon", "InvSimpson")) + geom_boxplot()

BMI (<=25 kg/m2, 25-29.9 kg/m2, >30 kg/m2); check numbers
# creating bmi_groups
meta_ps$BMI_group <- 'healthy_weight'
meta_ps$BMI_group[meta_ps$HWBMI.x >= 25] <- 'overweight'
meta_ps$BMI_group[meta_ps$HWBMI.x >= 30] <- 'obesity'
table(meta_ps$BMI_group)
##
## healthy_weight obesity overweight
## 104 118 253
ggplot(meta_ps, aes(x = HWBMI.x)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

sample_data(ps_g) <- meta_ps
sample_data(ps_n) <- meta_ps
sample_data(ps) <- meta_ps
BMI Adonis
adonis2(dist_wu_ps~ BMI_group, data = meta_ps, permutations = 10000)
| Model |
2 |
0.4641226 |
0.0090733 |
2.160902 |
0.0083992 |
| Residual |
472 |
50.6885113 |
0.9909267 |
NA |
NA |
| Total |
474 |
51.1526339 |
1.0000000 |
NA |
NA |
adonis2(dist_u_ps~ BMI_group, data = meta_ps)
| Model |
2 |
0.5775524 |
0.0060913 |
1.44636 |
0.032 |
| Residual |
472 |
94.2382005 |
0.9939087 |
NA |
NA |
| Total |
474 |
94.8157528 |
1.0000000 |
NA |
NA |
adonis2(dist_b_ps~ BMI_group, data = meta_ps, permutations = 10000)
| Model |
2 |
1.450256 |
0.0085311 |
2.030668 |
0.0016998 |
| Residual |
472 |
168.545708 |
0.9914689 |
NA |
NA |
| Total |
474 |
169.995965 |
1.0000000 |
NA |
NA |
pairwise
healthy weight v overweight
What I’ve found is obesity is significantly different from both
healthy and overweight. Overweight and healthy weight are not
significantly different.
BMI with beta div
# adding ordination plot
ps_ord <- ordinate(ps_g, method = "NMDS", distance = "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2487638
## Run 1 stress 0.2500495
## Run 2 stress 0.2506223
## Run 3 stress 0.2544275
## Run 4 stress 0.2936727
## Run 5 stress 0.2490637
## ... Procrustes: rmse 0.02405374 max resid 0.3977212
## Run 6 stress 0.2497193
## Run 7 stress 0.249183
## ... Procrustes: rmse 0.02611945 max resid 0.4501689
## Run 8 stress 0.256932
## Run 9 stress 0.2511705
## Run 10 stress 0.2488965
## ... Procrustes: rmse 0.01979155 max resid 0.3074272
## Run 11 stress 0.2500646
## Run 12 stress 0.25015
## Run 13 stress 0.249494
## Run 14 stress 0.2485425
## ... New best solution
## ... Procrustes: rmse 0.01830247 max resid 0.2877571
## Run 15 stress 0.2495614
## Run 16 stress 0.2492315
## Run 17 stress 0.250205
## Run 18 stress 0.269858
## Run 19 stress 0.2498718
## Run 20 stress 0.2496044
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 15: no. of iterations >= maxit
## 5: stress ratio > sratmax
ord_plot <- plot_ordination(ps_g, ps_ord, color = "age") +
geom_point(size = 2) +
#coord_fixed(xlim = c(-1.8, 1.8), ylim = c(-1.8,1.8)) +
ggtitle(paste("PCoA, weighted UNIFRAC")) + facet_wrap(~BMI_group)
plot(ord_plot)

BMI continuous variable with alpha div
# testing Observed
ggplot(meta_ps, aes(x = HWBMI.x, y = Observed)) + geom_point() + stat_smooth(method = "lm", col = "red")
## `geom_smooth()` using formula = 'y ~ x'

res <- cor.test(meta_ps$HWBMI.x, meta_ps$Observed,
method = "spearman") #pearson is for normal, spearman is for non-normal
## Warning in cor.test.default(meta_ps$HWBMI.x, meta_ps$Observed, method =
## "spearman"): Cannot compute exact p-value with ties
## [1] 0.242052
# testing Shannon
ggplot(meta_ps, aes(x = HWBMI.x, y = Shannon)) + geom_point() + stat_smooth(method = "lm", col = "red")
## `geom_smooth()` using formula = 'y ~ x'

res <- cor.test(meta_ps$HWBMI.x, meta_ps$Shannon,
method = "spearman") #pearson is for normal, spearman is for non-normal
## Warning in cor.test.default(meta_ps$HWBMI.x, meta_ps$Shannon, method =
## "spearman"): Cannot compute exact p-value with ties
## [1] 0.8992064
# testing InvSimpson
ggplot(meta_ps, aes(x = HWBMI.x, y = InvSimpson)) + geom_point() + stat_smooth(method = "lm", col = "red")
## `geom_smooth()` using formula = 'y ~ x'

res <- cor.test(meta_ps$HWBMI.x, meta_ps$InvSimpson,
method = "spearman") #pearson is for normal, spearman is for non-normal
## Warning in cor.test.default(meta_ps$HWBMI.x, meta_ps$InvSimpson, method =
## "spearman"): Cannot compute exact p-value with ties
## [1] 0.894394
# Testing pielou
ggplot(meta_ps, aes(x = HWBMI.x, y = pielou)) + geom_point() + stat_smooth(method = "lm", col = "red")
## `geom_smooth()` using formula = 'y ~ x'

res <- cor.test(meta_ps$HWBMI.x, meta_ps$pielou,
method = "spearman") #pearson is for normal, spearman is for non-normal
## Warning in cor.test.default(meta_ps$HWBMI.x, meta_ps$pielou, method =
## "spearman"): Cannot compute exact p-value with ties
## [1] 0.6109389
BMI groups variable with alpha div
kruskal.test(meta_ps$Observed ~ meta_ps$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: meta_ps$Observed by meta_ps$BMI_group
## Kruskal-Wallis chi-squared = 0.91607, df = 2, p-value = 0.6325
kruskal.test(meta_ps$Shannon ~ meta_ps$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: meta_ps$Shannon by meta_ps$BMI_group
## Kruskal-Wallis chi-squared = 1.4866, df = 2, p-value = 0.4755
kruskal.test(meta_ps$InvSimpson ~ meta_ps$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: meta_ps$InvSimpson by meta_ps$BMI_group
## Kruskal-Wallis chi-squared = 2.3505, df = 2, p-value = 0.3087
kruskal.test(meta_ps$pielou ~ meta_ps$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: meta_ps$pielou by meta_ps$BMI_group
## Kruskal-Wallis chi-squared = 4.2825, df = 2, p-value = 0.1175
BMI alpha div values
meta_ps %>%
group_by(BMI_group) %>%
summarise(med_obs = quantile(Observed, probs = c(0.25,0.5,0.75)), med_shan = quantile(Shannon, probs = c(0.25,0.5,0.75)), med_inv = quantile(InvSimpson, probs = c(0.25,0.5,0.75)), med_pie = quantile(pielou, probs = c(0.25,0.5,0.75)))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'BMI_group'. You can override using the
## `.groups` argument.
| healthy_weight |
16.0 |
1.685743 |
2.979081 |
0.5540822 |
| healthy_weight |
22.0 |
2.087516 |
5.017265 |
0.6613625 |
| healthy_weight |
30.0 |
2.395938 |
7.104957 |
0.7437547 |
| obesity |
18.0 |
1.612226 |
3.245733 |
0.5621228 |
| obesity |
24.5 |
2.060327 |
4.916635 |
0.6391371 |
| obesity |
30.0 |
2.361623 |
7.028608 |
0.7407123 |
| overweight |
16.0 |
1.702340 |
3.472576 |
0.5767366 |
| overweight |
22.0 |
2.136180 |
5.453378 |
0.6860155 |
| overweight |
29.0 |
2.409417 |
7.367755 |
0.7596021 |
meta_ps %>%
group_by(BMI_group) %>%
summarise(mean_obs = mean(Observed), mean_shan = mean(Shannon), mean_inv = mean(InvSimpson), mean_pie = mean(pielou))
| healthy_weight |
24.13462 |
1.972215 |
5.457860 |
0.6315111 |
| obesity |
23.98305 |
1.952268 |
5.438977 |
0.6213352 |
| overweight |
23.32016 |
2.022743 |
5.878325 |
0.6548878 |
Top genera and BMI
ps_n_genus <- tax_glom(ps_n, "Genus")
ps_melt <- psmelt(ps_n_genus)
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
summary_genus <- ps_melt %>%
group_by(Genus, BMI_group) %>%
summarise(Abundance = Abundance, rank_abundance = sum(Abundance), mean_abundance = mean(Abundance), median_abundance = median(Abundance), min_abundance = min(Abundance), max_abundance = max(Abundance)) %>%
arrange(desc(rank_abundance)) %>%
mutate(order = row_number())
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Genus', 'BMI_group'. You can override
## using the `.groups` argument.
# Staphylococcus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Staphylococcus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BMI_group
## Kruskal-Wallis chi-squared = 10.294, df = 2, p-value = 0.005816
pairwise.wilcox.test(ps_melt$Abundance, ps_melt$BMI_group, p.adjust.method = "fdr")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: ps_melt$Abundance and ps_melt$BMI_group
##
## healthy_weight obesity
## obesity 0.0065 -
## overweight 0.1901 0.0226
##
## P value adjustment method: fdr
# Neisseria
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Neisseria")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BMI_group
## Kruskal-Wallis chi-squared = 2.1258, df = 2, p-value = 0.3454
# Corynebacterium
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Corynebacterium")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BMI_group
## Kruskal-Wallis chi-squared = 9.5717, df = 2, p-value = 0.008347
pairwise.wilcox.test(ps_melt$Abundance, ps_melt$BMI_group, p.adjust.method = "fdr")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: ps_melt$Abundance and ps_melt$BMI_group
##
## healthy_weight obesity
## obesity 0.0057 -
## overweight 0.1469 0.0526
##
## P value adjustment method: fdr
# Prevotella
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Prevotella")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BMI_group
## Kruskal-Wallis chi-squared = 4.0539, df = 2, p-value = 0.1317
# Streptococcus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Streptococcus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BMI_group
## Kruskal-Wallis chi-squared = 19.441, df = 2, p-value = 0.00006003
pairwise.wilcox.test(ps_melt$Abundance, ps_melt$BMI_group, p.adjust.method = "fdr")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: ps_melt$Abundance and ps_melt$BMI_group
##
## healthy_weight obesity
## obesity 0.00040 -
## overweight 0.67509 0.00012
##
## P value adjustment method: fdr
# Bacteroides
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Bacteroides")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BMI_group
## Kruskal-Wallis chi-squared = 2.5372, df = 2, p-value = 0.2812
# Anaerococcus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Anaerococcus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BMI_group
## Kruskal-Wallis chi-squared = 0.20641, df = 2, p-value = 0.9019
# Haemophilus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Haemophilus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BMI_group
## Kruskal-Wallis chi-squared = 1.6184, df = 2, p-value = 0.4452
# Escherichia/Shigella
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Escherichia/Shigella")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BMI_group
## Kruskal-Wallis chi-squared = 0.93119, df = 2, p-value = 0.6278
# Finegoldia
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Finegoldia")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BMI_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BMI_group
## Kruskal-Wallis chi-squared = 1.2924, df = 2, p-value = 0.524
plot
BMI_genera is saved to be put together with another plot for Figure 1
during HAllA analysis.
summary_genus$BMI_title <- "overweight"
summary_genus$BMI_title[summary_genus$BMI_group == "healthy_weight"] <- "healthy \nweight"
summary_genus$BMI_title[summary_genus$BMI_group == "obesity"] <- "obese"
summary_genus$BMI_title <- factor(summary_genus$BMI_title, levels = c("healthy \nweight", "overweight", "obese"))
summary_genus$BMI_group <- factor(summary_genus$BMI_group, levels = c("healthy_weight", "overweight", "obesity"))
plot <- summary_genus %>%
filter(Genus %in% c("Staphylococcus", "Corynebacterium", "Streptococcus")) %>%
ggplot(., aes(x = BMI_title, y = Abundance, fill = BMI_title)) +
geom_beeswarm() +
geom_boxplot(alpha = 0.7) +
xlab("") +
facet_wrap(~Genus) +
theme_bw() +
theme(strip.background = element_rect(colour="black", fill="white")) +
theme(legend.position = "none")
my_comparisons <- list( c("healthy \nweight", "overweight"), c("healthy \nweight", "obese"), c("overweight", "obese"))
BMI_genera <- plot + stat_compare_means(comparisons = my_comparisons) +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.15)))
BMI_genera

summary_genus %>%
filter(Genus %in% c("Staphylococcus", "Corynebacterium", "Streptococcus", "Neisseria", "Prevotella", "Anaerococcus", "Bacteroides", "Finegoldia", "Escherichia/Shigella", "Haemophilus")) %>%
ggplot(., aes(x = Genus, y = Abundance, fill = BMI_title)) + geom_boxplot() + facet_wrap(~Genus, scales = "free") + xlab("") + guides(fill=guide_legend(title="BMI"))

Top genera and BPH
ps_melt <- psmelt(ps_n_genus)
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
summary_genus <- ps_melt %>%
group_by(Genus, BPH) %>%
summarise(Abundance = Abundance, rank_abundance = sum(Abundance), mean_abundance = mean(Abundance), median_abundance = median(Abundance), min_abundance = min(Abundance), max_abundance = max(Abundance)) %>%
arrange(desc(rank_abundance)) %>%
mutate(order = row_number())
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Genus', 'BPH'. You can override using the
## `.groups` argument.
# Staphylococcus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Staphylococcus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BPH)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BPH
## Kruskal-Wallis chi-squared = 0.011221, df = 1, p-value = 0.9156
# Neisseria
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Neisseria")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BPH)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BPH
## Kruskal-Wallis chi-squared = 0.3368, df = 1, p-value = 0.5617
# Corynebacterium
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Corynebacterium")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BPH)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BPH
## Kruskal-Wallis chi-squared = 0.45521, df = 1, p-value = 0.4999
# Prevotella
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Prevotella")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BPH)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BPH
## Kruskal-Wallis chi-squared = 0.076543, df = 1, p-value = 0.782
# Streptococcus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Streptococcus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BPH)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BPH
## Kruskal-Wallis chi-squared = 1.1512, df = 1, p-value = 0.2833
# Bacteroides
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Bacteroides")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BPH)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BPH
## Kruskal-Wallis chi-squared = 0.59891, df = 1, p-value = 0.439
# Anaerococcus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Anaerococcus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BPH)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BPH
## Kruskal-Wallis chi-squared = 0.44092, df = 1, p-value = 0.5067
# Haemophilus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Haemophilus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BPH)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BPH
## Kruskal-Wallis chi-squared = 0.35104, df = 1, p-value = 0.5535
# Escherichia/Shigella
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Escherichia/Shigella")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BPH)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BPH
## Kruskal-Wallis chi-squared = 0.21031, df = 1, p-value = 0.6465
# Finegoldia
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Finegoldia")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$BPH)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$BPH
## Kruskal-Wallis chi-squared = 0.017165, df = 1, p-value = 0.8958
plot
summary_genus %>%
filter(Genus %in% c("Staphylococcus", "Corynebacterium", "Streptococcus")) %>%
ggplot(., aes(x = Genus, y = Abundance, fill = BPH)) + geom_boxplot() + facet_wrap(~Genus, scales = "free")

summary_genus %>%
filter(Genus %in% c("Staphylococcus", "Corynebacterium", "Streptococcus", "Neisseria", "Prevotella", "Anaerococcus", "Bacteroides", "Finegoldia", "Escherichia/Shigella", "Haemophilus")) %>%
ggplot(., aes(x = Genus, y = Abundance, fill = BPH)) + geom_boxplot() + facet_wrap(~Genus, scales = "free")

Top genera and age
ps_melt <- psmelt(ps_n_genus)
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
summary_genus <- ps_melt %>%
group_by(Genus, age_group) %>%
summarise(Abundance = Abundance, rank_abundance = sum(Abundance), mean_abundance = mean(Abundance), median_abundance = median(Abundance), min_abundance = min(Abundance), max_abundance = max(Abundance)) %>%
arrange(desc(rank_abundance)) %>%
mutate(order = row_number())
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Genus', 'age_group'. You can override
## using the `.groups` argument.
# Staphylococcus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Staphylococcus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 0.60704, df = 2, p-value = 0.7382
# Neisseria
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Neisseria")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 1.5524, df = 2, p-value = 0.4601
# Corynebacterium
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Corynebacterium")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 3.9896, df = 2, p-value = 0.136
# Prevotella
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Prevotella")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 0.052454, df = 2, p-value = 0.9741
# Streptococcus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Streptococcus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 4.4964, df = 2, p-value = 0.1056
# Bacteroides
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Bacteroides")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 6.5924, df = 2, p-value = 0.03702
pairwise.wilcox.test(ps_melt$Abundance, ps_melt$age_group, p.adjust.method = "fdr")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: ps_melt$Abundance and ps_melt$age_group
##
## age1 age2
## age2 0.059 -
## age3 0.059 0.903
##
## P value adjustment method: fdr
# Anaerococcus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Anaerococcus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 4.5813, df = 2, p-value = 0.1012
# Haemophilus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Haemophilus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 4.0821, df = 2, p-value = 0.1299
# Escherichia/Shigella
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Escherichia/Shigella")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 0.28786, df = 2, p-value = 0.8659
# Finegoldia
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Finegoldia")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 3.9387, df = 2, p-value = 0.1395
Bacteroides is significant, however once we do multiple testing
corrections, it’s no longer significant.
Top phylum and Age
ps_n_phy <- tax_glom(ps_n, "Phylum")
ps_melt <- psmelt(ps_n_phy)
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
summary_phylum <- ps_melt %>%
group_by(Phylum, age_group) %>%
summarise(rank_abundance = sum(Abundance), mean_abundance = mean(Abundance), median_abundance = median(Abundance), min_abundance = min(Abundance), max_abundance = max(Abundance), Abundance = Abundance) %>%
arrange(desc(rank_abundance)) %>%
mutate(order = row_number()) %>%
ungroup()
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Phylum', 'age_group'. You can override
## using the `.groups` argument.
# Firmicutes
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Firmicutes")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 0.12833, df = 2, p-value = 0.9379
# Proteobacteria
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Proteobacteria")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 0.53725, df = 2, p-value = 0.7644
# Actinobacteria
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Actinobacteria")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 7.0943, df = 2, p-value = 0.02881
pairwise.wilcox.test(ps_melt$Abundance, ps_melt$age_group, p.adjust.method = "fdr")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: ps_melt$Abundance and ps_melt$age_group
##
## age1 age2
## age2 0.346 -
## age3 0.081 0.034
##
## P value adjustment method: fdr
# Bacteroidetes
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Bacteroidetes")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 2.4174, df = 2, p-value = 0.2986
# Fusobacteria
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Cyanobacteria")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$age_group)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$age_group
## Kruskal-Wallis chi-squared = 1.1149, df = 2, p-value = 0.5727
plot
summary_phylum %>%
filter(Phylum == "Actinobacteria") %>%
ggplot(., aes(x = Phylum, y = Abundance, fill = age_group)) + geom_boxplot()

summary_phylum %>%
filter(Phylum %in% c("Firmicutes", "Proteobacteria", "Actinobacteria", "Bacteroidetes", "Cyanobacteria")) %>%
ggplot(., aes(x = Phylum, y = Abundance, fill = age_group)) + geom_boxplot() + facet_wrap(~Phylum, scales = "free") + scale_fill_discrete(name = "Age range", labels = c("65-70", "71-75", "76-90"))

Cancer status alpha div
kruskal.test(meta_ps$Observed ~ meta_ps$Cancer_Type)
##
## Kruskal-Wallis rank sum test
##
## data: meta_ps$Observed by meta_ps$Cancer_Type
## Kruskal-Wallis chi-squared = 0.36547, df = 2, p-value = 0.833
kruskal.test(meta_ps$Shannon ~ meta_ps$Cancer_Type)
##
## Kruskal-Wallis rank sum test
##
## data: meta_ps$Shannon by meta_ps$Cancer_Type
## Kruskal-Wallis chi-squared = 0.48699, df = 2, p-value = 0.7839
kruskal.test(meta_ps$InvSimpson ~ meta_ps$Cancer_Type)
##
## Kruskal-Wallis rank sum test
##
## data: meta_ps$InvSimpson by meta_ps$Cancer_Type
## Kruskal-Wallis chi-squared = 1.1955, df = 2, p-value = 0.5501
kruskal.test(meta_ps$pielou ~ meta_ps$Cancer_Type)
##
## Kruskal-Wallis rank sum test
##
## data: meta_ps$pielou by meta_ps$Cancer_Type
## Kruskal-Wallis chi-squared = 2.2425, df = 2, p-value = 0.3259
No cancer associations when it comes to alpha diversity.
Race, Diabetes, Prostatitis, BPH
alpha div for table
# Prostatitis
meta_ps %>%
group_by(Prostatitis) %>%
summarise(mean_obs = mean(Observed), mean_shan = mean(Shannon), mean_inv =
mean(InvSimpson), mean_pie = mean(pielou), med_obs = quantile(Observed, probs = c(0.25,0.5,0.75)), med_shan = quantile(Shannon, probs = c(0.25,0.5,0.75)), med_inv = quantile(InvSimpson, probs = c(0.25,0.5,0.75)), med_pie = quantile(pielou, probs = c(0.25,0.5,0.75)))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Prostatitis'. You can override using the
## `.groups` argument.
| No |
23.56757 |
1.980614 |
5.609384 |
0.6359285 |
17 |
1.700752 |
3.321660 |
0.5578825 |
| No |
23.56757 |
1.980614 |
5.609384 |
0.6359285 |
23 |
2.093647 |
5.088242 |
0.6629334 |
| No |
23.56757 |
1.980614 |
5.609384 |
0.6359285 |
30 |
2.396817 |
7.204845 |
0.7531488 |
| Yes |
24.00000 |
2.041951 |
5.915818 |
0.6608361 |
16 |
1.629969 |
3.363575 |
0.5840326 |
| Yes |
24.00000 |
2.041951 |
5.915818 |
0.6608361 |
22 |
2.136180 |
5.455359 |
0.6862615 |
| Yes |
24.00000 |
2.041951 |
5.915818 |
0.6608361 |
31 |
2.396599 |
7.475148 |
0.7480798 |
# Diabetes
meta_ps %>%
group_by(Diabetes) %>%
summarise(mean_obs = mean(Observed), mean_shan = mean(Shannon), mean_inv =
mean(InvSimpson), mean_pie = mean(pielou), med_obs = quantile(Observed, probs = c(0.25,0.5,0.75)), med_shan = quantile(Shannon, probs = c(0.25,0.5,0.75)), med_inv = quantile(InvSimpson, probs = c(0.25,0.5,0.75)), med_pie = quantile(pielou, probs = c(0.25,0.5,0.75)))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Diabetes'. You can override using the
## `.groups` argument.
|
24.87500 |
2.178119 |
6.420298 |
0.6975589 |
16 |
1.877506 |
3.961239 |
0.6083842 |
|
24.87500 |
2.178119 |
6.420298 |
0.6975589 |
26 |
2.285719 |
6.116462 |
0.7308268 |
|
24.87500 |
2.178119 |
6.420298 |
0.6975589 |
31 |
2.559207 |
8.882585 |
0.7907660 |
| 0. No |
23.28962 |
1.964015 |
5.541601 |
0.6335592 |
16 |
1.647245 |
3.245733 |
0.5617487 |
| 0. No |
23.28962 |
1.964015 |
5.541601 |
0.6335592 |
22 |
2.078796 |
5.050811 |
0.6643547 |
| 0. No |
23.28962 |
1.964015 |
5.541601 |
0.6335592 |
30 |
2.377931 |
7.109470 |
0.7476157 |
| 1. Yes |
24.94203 |
2.047501 |
5.965145 |
0.6506712 |
18 |
1.768684 |
3.462120 |
0.5821625 |
| 1. Yes |
24.94203 |
2.047501 |
5.965145 |
0.6506712 |
24 |
2.176892 |
5.243518 |
0.6628698 |
| 1. Yes |
24.94203 |
2.047501 |
5.965145 |
0.6506712 |
30 |
2.391890 |
7.337760 |
0.7450489 |
# BPH
meta_ps %>%
group_by(BPH) %>%
summarise(mean_obs = mean(Observed), mean_shan = mean(Shannon), mean_inv =
mean(InvSimpson), mean_pie = mean(pielou), med_obs = quantile(Observed, probs = c(0.25,0.5,0.75)), med_shan = quantile(Shannon, probs = c(0.25,0.5,0.75)), med_inv = quantile(InvSimpson, probs = c(0.25,0.5,0.75)), med_pie = quantile(pielou, probs = c(0.25,0.5,0.75)))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'BPH'. You can override using the `.groups`
## argument.
| No |
23.46559 |
1.952726 |
5.531237 |
0.6277404 |
17.0 |
1.668624 |
3.134892 |
0.5518958 |
| No |
23.46559 |
1.952726 |
5.531237 |
0.6277404 |
23.0 |
2.027607 |
4.780475 |
0.6420510 |
| No |
23.46559 |
1.952726 |
5.531237 |
0.6277404 |
29.5 |
2.329260 |
6.876756 |
0.7508313 |
| Yes |
23.87719 |
2.039073 |
5.835163 |
0.6562695 |
16.0 |
1.746549 |
3.558166 |
0.6080032 |
| Yes |
23.87719 |
2.039073 |
5.835163 |
0.6562695 |
22.5 |
2.179770 |
5.775627 |
0.6852489 |
| Yes |
23.87719 |
2.039073 |
5.835163 |
0.6562695 |
30.0 |
2.424821 |
7.395575 |
0.7533930 |
# Race
meta_ps %>%
group_by(Race) %>%
summarise(mean_obs = mean(Observed), mean_shan = mean(Shannon), mean_inv =
mean(InvSimpson), mean_pie = mean(pielou), med_obs = quantile(Observed, probs = c(0.25,0.5,0.75)), med_shan = quantile(Shannon, probs = c(0.25,0.5,0.75)), med_inv = quantile(InvSimpson, probs = c(0.25,0.5,0.75)), med_pie = quantile(pielou, probs = c(0.25,0.5,0.75)))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Race'. You can override using the
## `.groups` argument.
| 1. White |
23.63333 |
1.994355 |
5.696240 |
0.6421864 |
16 |
1.690993 |
3.322045 |
0.5714168 |
| 1. White |
23.63333 |
1.994355 |
5.696240 |
0.6421864 |
23 |
2.106661 |
5.132279 |
0.6670039 |
| 1. White |
23.63333 |
1.994355 |
5.696240 |
0.6421864 |
30 |
2.393067 |
7.170256 |
0.7500558 |
| 2. Other |
23.89091 |
1.992778 |
5.531125 |
0.6356917 |
17 |
1.694556 |
3.234978 |
0.5542984 |
| 2. Other |
23.89091 |
1.992778 |
5.531125 |
0.6356917 |
26 |
2.178873 |
5.023949 |
0.6834292 |
| 2. Other |
23.89091 |
1.992778 |
5.531125 |
0.6356917 |
30 |
2.418197 |
7.822592 |
0.7618187 |
# Cancer
meta_ps %>%
group_by(Cancer_Type) %>%
summarise(mean_obs = mean(Observed), mean_shan = mean(Shannon), mean_inv =
mean(InvSimpson), mean_pie = mean(pielou), med_obs = quantile(Observed, probs = c(0.25,0.5,0.75)), med_shan = quantile(Shannon, probs = c(0.25,0.5,0.75)), med_inv = quantile(InvSimpson, probs = c(0.25,0.5,0.75)), med_pie = quantile(pielou, probs = c(0.25,0.5,0.75)))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Cancer_Type'. You can override using the
## `.groups` argument.
| None |
23.72274 |
1.983064 |
5.521277 |
0.6362878 |
17 |
1.688364 |
3.163341 |
0.5647222 |
| None |
23.72274 |
1.983064 |
5.521277 |
0.6362878 |
23 |
2.097090 |
5.073466 |
0.6628698 |
| None |
23.72274 |
1.983064 |
5.521277 |
0.6362878 |
30 |
2.390805 |
7.196215 |
0.7430824 |
| Other |
23.00000 |
1.989983 |
5.741079 |
0.6451459 |
17 |
1.741696 |
3.465732 |
0.5802251 |
| Other |
23.00000 |
1.989983 |
5.741079 |
0.6451459 |
23 |
2.090204 |
5.195756 |
0.6742238 |
| Other |
23.00000 |
1.989983 |
5.741079 |
0.6451459 |
28 |
2.409326 |
7.113704 |
0.7709955 |
| Prostate |
24.27692 |
2.054767 |
6.359183 |
0.6617687 |
15 |
1.645795 |
3.447023 |
0.5840326 |
| Prostate |
24.27692 |
2.054767 |
6.359183 |
0.6617687 |
23 |
2.165680 |
5.764774 |
0.6943543 |
| Prostate |
24.27692 |
2.054767 |
6.359183 |
0.6617687 |
30 |
2.469154 |
7.737528 |
0.7687283 |
observed
wilcox.test(meta_ps$Observed ~ meta_ps$Prostatitis)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$Observed by meta_ps$Prostatitis
## W = 19948, p-value = 0.6739
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(meta_ps$Observed ~ meta_ps$Race)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$Observed by meta_ps$Race
## W = 10779, p-value = 0.4206
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(meta_ps$Observed ~ meta_ps$BPH)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$Observed by meta_ps$BPH
## W = 27867, p-value = 0.8458
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(meta_ps$Observed ~ meta_ps$Cancer)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$Observed by meta_ps$Cancer
## W = 25408, p-value = 0.6215
## alternative hypothesis: true location shift is not equal to 0
test_dia <- meta_ps %>% filter(Diabetes != "")
wilcox.test(test_dia$Observed ~ test_dia$Diabetes)
##
## Wilcoxon rank sum test with continuity correction
##
## data: test_dia$Observed by test_dia$Diabetes
## W = 11637, p-value = 0.3013
## alternative hypothesis: true location shift is not equal to 0
Shannon
wilcox.test(meta_ps$Shannon ~ meta_ps$Prostatitis)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$Shannon by meta_ps$Prostatitis
## W = 18921, p-value = 0.685
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(meta_ps$Shannon ~ meta_ps$Race)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$Shannon by meta_ps$Race
## W = 11180, p-value = 0.6995
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(meta_ps$Shannon ~ meta_ps$BPH)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$Shannon by meta_ps$BPH
## W = 25013, p-value = 0.03539
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(meta_ps$Shannon ~ meta_ps$Cancer)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$Shannon by meta_ps$Cancer
## W = 23940, p-value = 0.5792
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(test_dia$Shannon ~ test_dia$Diabetes)
##
## Wilcoxon rank sum test with continuity correction
##
## data: test_dia$Shannon by test_dia$Diabetes
## W = 11823, p-value = 0.4016
## alternative hypothesis: true location shift is not equal to 0
InvSimpson
wilcox.test(meta_ps$InvSimpson ~ meta_ps$Prostatitis)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$InvSimpson by meta_ps$Prostatitis
## W = 18498, p-value = 0.4555
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(meta_ps$InvSimpson ~ meta_ps$Race)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$InvSimpson by meta_ps$Race
## W = 11590, p-value = 0.9671
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(meta_ps$InvSimpson ~ meta_ps$BPH)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$InvSimpson by meta_ps$BPH
## W = 25086, p-value = 0.03987
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(meta_ps$InvSimpson ~ meta_ps$Cancer)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$InvSimpson by meta_ps$Cancer
## W = 23530, p-value = 0.3968
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(test_dia$InvSimpson ~ test_dia$Diabetes)
##
## Wilcoxon rank sum test with continuity correction
##
## data: test_dia$InvSimpson by test_dia$Diabetes
## W = 11872, p-value = 0.4309
## alternative hypothesis: true location shift is not equal to 0
Pielou
wilcox.test(meta_ps$pielou ~ meta_ps$Prostatitis)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$pielou by meta_ps$Prostatitis
## W = 18362, p-value = 0.3921
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(meta_ps$pielou ~ meta_ps$Race)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$pielou by meta_ps$Race
## W = 11642, p-value = 0.9238
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(meta_ps$pielou ~ meta_ps$BPH)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$pielou by meta_ps$BPH
## W = 24996, p-value = 0.03441
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(meta_ps$pielou ~ meta_ps$Cancer)
##
## Wilcoxon rank sum test with continuity correction
##
## data: meta_ps$pielou by meta_ps$Cancer
## W = 22824, p-value = 0.1765
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(test_dia$pielou ~ test_dia$Diabetes)
##
## Wilcoxon rank sum test with continuity correction
##
## data: test_dia$pielou by test_dia$Diabetes
## W = 12403, p-value = 0.8155
## alternative hypothesis: true location shift is not equal to 0
ggplot(meta_ps_long, aes(x = BPH, y = value, fill = BPH)) + geom_violin() + facet_wrap(~variable, scales = "free", nrow = 1) +
theme(strip.text.x = element_text(size = 12))
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

BPH table 1
alpha_bph <- meta_ps %>%
select(BPH, Observed, Shannon, InvSimpson, pielou)
alpha_table <- CreateTableOne(strata = "BPH", data = alpha_bph)
print(alpha_table, nonnormal = c("Observed", "Shannon", "InvSimpson", "pielou"))
## Stratified by BPH
## No Yes p
## n 247 228
## BPH = Yes (%) 0 (0.0) 228 (100.0) <0.001
## Observed (median [IQR]) 23.00 [17.00, 29.50] 22.50 [16.00, 30.00] 0.846
## Shannon (median [IQR]) 2.03 [1.67, 2.33] 2.18 [1.75, 2.42] 0.035
## InvSimpson (median [IQR]) 4.78 [3.13, 6.88] 5.78 [3.56, 7.40] 0.040
## pielou (median [IQR]) 0.64 [0.55, 0.75] 0.69 [0.61, 0.75] 0.034
## Stratified by BPH
## test
## n
## BPH = Yes (%)
## Observed (median [IQR]) nonnorm
## Shannon (median [IQR]) nonnorm
## InvSimpson (median [IQR]) nonnorm
## pielou (median [IQR]) nonnorm
Top Phyla vs LUTS
ps_n <- transform_sample_counts(ps,function(x) 100* x/sum(x))
ps_n_phy <- tax_glom(ps_n, "Phylum")
ps_melt <- psmelt(ps_n_phy)
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
summary_phylum <- ps_melt %>%
group_by(Phylum, LUTS_cat) %>%
summarise(rank_abundance = sum(Abundance), mean_abundance = mean(Abundance), median_abundance = median(Abundance), min_abundance = min(Abundance), max_abundance = max(Abundance)) %>%
arrange(desc(rank_abundance)) %>%
mutate(order = row_number()) %>%
ungroup()
## `summarise()` has grouped output by 'Phylum'. You can override using the
## `.groups` argument.
# Firmicutes
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Firmicutes")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.69319, df = 1, p-value = 0.4051
# Proteobacteria
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Proteobacteria")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.000013704, df = 1, p-value = 0.997
# Actinobacteria
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Actinobacteria")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 1.6718, df = 1, p-value = 0.196
# Bacteroidetes
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Bacteroidetes")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.046117, df = 1, p-value = 0.83
# Fusobacteria
ps_melt <- psmelt(ps_n_phy) %>% filter(Phylum == "Fusobacteria")
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.47191, df = 1, p-value = 0.4921
Top Phyla No/Mild
ps_melt <- psmelt(ps_n_phy)
## Warning in psmelt(ps_n_phy): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
summary_phylum_nomild <- ps_melt %>%
filter(LUTS_cat == "no/mild") %>%
group_by(Phylum) %>%
summarise(rank_abundance = sum(Abundance), mean_abundance = mean(Abundance), median_abundance = median(Abundance), min_abundance = min(Abundance), max_abundance = max(Abundance)) %>%
arrange(desc(rank_abundance)) %>%
mutate(order = row_number()) %>%
ungroup()
Top Phyla Moderate/Severe
summary_phylum_modsevere <- ps_melt %>%
filter(LUTS_cat == "moderate/severe") %>%
group_by(Phylum) %>%
summarise(rank_abundance = sum(Abundance), mean_abundance = mean(Abundance), median_abundance = median(Abundance), min_abundance = min(Abundance), max_abundance = max(Abundance)) %>%
arrange(desc(rank_abundance)) %>%
mutate(order = row_number()) %>%
ungroup()
Top Genera vs LUTS
ps_n_genus <- tax_glom(ps_n, "Genus")
ps_melt <- psmelt(ps_n_genus)
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
summary_genus <- ps_melt %>%
group_by(Genus, LUTS_cat) %>%
summarise(rank_abundance = sum(Abundance), mean_abundance = mean(Abundance), median_abundance = median(Abundance), min_abundance = min(Abundance), max_abundance = max(Abundance)) %>%
arrange(desc(rank_abundance)) %>%
mutate(order = row_number()) %>%
ungroup()
## `summarise()` has grouped output by 'Genus'. You can override using the
## `.groups` argument.
# Staphylococcus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Staphylococcus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.7535, df = 1, p-value = 0.3854
# Neisseria
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Neisseria")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.17884, df = 1, p-value = 0.6724
# Corynebacterium
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Corynebacterium")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.0071621, df = 1, p-value = 0.9326
# Prevotella
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Prevotella")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.81943, df = 1, p-value = 0.3653
# Streptococcus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Streptococcus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.26292, df = 1, p-value = 0.6081
# Bacteroides
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Bacteroides")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.90236, df = 1, p-value = 0.3422
# Anaerococcus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Anaerococcus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.0010014, df = 1, p-value = 0.9748
# Haemophilus
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Haemophilus")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.14812, df = 1, p-value = 0.7003
# Escherichia/Shigella
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Escherichia/Shigella")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.7038, df = 1, p-value = 0.4015
# Finegoldia
ps_melt <- psmelt(ps_n_genus) %>% filter(Genus == "Finegoldia")
## Warning in psmelt(ps_n_genus): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
kruskal.test(ps_melt$Abundance ~ ps_melt$LUTS_cat)
##
## Kruskal-Wallis rank sum test
##
## data: ps_melt$Abundance by ps_melt$LUTS_cat
## Kruskal-Wallis chi-squared = 0.69829, df = 1, p-value = 0.4034